# coding:utf-8
# 将语料转换成unilm的格式
path = r'/data/corpus_hj/synonymous_validation.txt'
f = r'/data/corpus_hj/synonymous_validation_decode.json'
import json


def read_write_file(path_a, f_a):
    f2 = open(f_a, 'a+', encoding='utf-8')
    i = 0
    with open(path_a, 'r', encoding='utf-8') as f1:
        lines = f1.readlines()
        for line in lines:
            i += 1
            if i % 1000 == 0:
                print("执行到了第%d条" % i)
            line_list = line.strip().split("\t")
            dict_str = {
                "src": line_list[0],
                "tgt": line_list[1]
            }
            json_str = json.dumps(dict_str)
            f2.write(json_str + "\n")
    f2.close()


if __name__ == '__main__':
    read_write_file(path, f)
